

#######
## This script requires proprietary data on AP3 FIPS-to-FIPS mapping

require(sf)
require(sp)
require(rgeos)
require(rgdal)
require(tidycensus)
require(readxl)
require(haven)
require(foreign)
require(abind)
require(readxl)
require(doSNOW)
require(iterators)
require(foreach)


WORK = file.path('Z:/user/ajk41/Solar siting/Scripts/JAERE Code Repository/Data JAERE')
WORK.OUT = file.path(WORK, 'intermediate_output')
dir.create(WORK.OUT)
dir.create(file.path(WORK.OUT, 'Damages'))

#################

source(file=file.path(dirname(WORK),'jDeltaEmissions_functions_v4.R')) 
using.crs =   3395


#### Load results (coefficients) ####
# Later Years (mid-sample onward):           LaterYearsCoefs.rds
# WECC Bootstrap:                            BlistWECC_compiled.rds and BlistWECC_compiledMain.rds
# TRE Bootstrap:                             BlistTexas.rds
# Point estimates for all interconnections:  usingBetasProcessed.rds <--- main results here

plant.coef = readRDS(file.path(WORK.OUT, 'usingBetasProcessed.rds'))
BlistTRE = readRDS(file.path(WORK.OUT, 'BlistTexas.rds'))
BlistWECC = readRDS(file.path(WORK.OUT, 'BlistWECC_Compiled.rds'))
LaterYears = readRDS(file.path(WORK.OUT, 'LaterYearsCoefs.rds'))
  empties = unlist(lapply(LaterYears, function(ii) 'character' %in% class(ii))) # Each of the elements of LaterYears is a unique set of year-months of operation. When cut to the later years, some plants no longer have any operating record (e.g. closed) and thus (1) cannot have their response estimated, and (2) should not have their response estimated as they no longer respond.
  LaterYears = LaterYears[!empties]


#### Load ancillary data, zip-level solar insolation, PM2.5 conversion, FIPS maps ####
# zip-level 8760 hour TMY3 annual solar insolation.
fzip = fread(file.path(WORK.OUT, 'solargen_zip_hour_CEC.csv'),
             col.names = c('junk','zip',paste0('hr_',1:8760)))[,-1][,zip:=sprintf("%05d", zip)]

zip_to_NERC = readRDS(file.path(WORK, 'TEZ_mapping_zip_to_NERCsub.rds'))

fzip = merge(x = fzip, y=zip_to_NERC, by='zip', all.x=F, all.y=F)
WEST.INT = c('AZNM','CAMX','NWPP','RMPA'); TRE.INT = c('ERCT')
fzip[,INT:=ifelse(pcac%in%TRE.INT, 'TRE',
                  ifelse(pcac%in%WEST.INT, 'WEST', 'EAST'))]
setcolorder(fzip, c('zip','pcac','INT',paste0('hr_',1:8760)))


#### FIPS mapping ####
FIPS = readOGR(dsn=file.path(WORK, 'FIPS Shapefiles'), layer='cb_2013_us_county_500k')
FIPS@data$fips = as.character(FIPS@data$GEOID)
proj.fips = proj4string(FIPS)


#### GLOAD-to-PM25 conversion ####
# PM25 emissions are constant proportion to GLOAD
# GLOAD-to-PM25 conversion is sourced from data reported to eGrid and compiled here:
PM25 = readRDS(file.path(WORK, 'PM25_factor.rds'))

## Get Plant ID (ORISPL) to FIPS (so we know where plant-level emissions occur; AP3 is emissions damages by county of source)
Ot = read_excel(path=file.path(WORK,'eGRID', 'eGRID2012_Data.xlsx'), sheet='PLNT12', skip=4)[,c('PNAME','UTLSRVNM','ORISPL','NERC','LON','LAT')]
Ot = as.data.frame(unique(Ot[which(!Ot$NERC%in%c('ASCC','HICC')),]))
Ot[which(Ot$ORISPL==7253),c('LON','LAT')] = c(-81.5208,39.3672) # Missing lon-lat, but can be easily found.
Ot = rbind(Ot, data.frame(PNAME = 'UNK DC PLANT', UTLSRVNM = 'UNK DC UTILITY',NERC = 'RFC', ORISPL=880004,LON=38.889931, LAT=-77.009003)) # also missing from eGrid
# Ot = Ot[Ot$ORISPL%in%use.ORISPL,]
Ot = Ot[!is.na(Ot$ORISPL),]

Ot$LAT[Ot$LAT==0] = NA
Ot$LON[Ot$LON==0] = NA

Ot = Ot[!is.na(Ot$LON) & !is.na(Ot$LAT),]
# Ot = Ot[which(Ot$ORISPL%in%use.ORISPL),]
coordinates(Ot) = ~LON + LAT
proj4string(Ot) = CRS("+init=epsg:4326")
Ot = spTransform(Ot, proj.fips)

tres = over(Ot, FIPS[,c('NAME','fips')])
Ot@data = cbind(Ot@data, tres)
Ot@data$NAME = as.character(Ot@data$NAME)
Ot@data[which(Ot$ORISPL==880004),c('NAME','fips')] = c('DC','11001')
Ot@data[which(Ot$ORISPL==1843),c('NAME','fips')] = c('Marquette','26519')
Ot$ORISPL_CODE = paste0('ORISPL_',Ot$ORISPL)




#### AP3 Load ####
stop('AP3 FIPS-to-FIPS mappings are proprietary')
AP3.criteria = c('NOx', 'SO2','PM25', 'CO2') #--! Add NH3 as well for AP3; has to be exact format as .txt's (NOx...)
AP3.names = sprintf("%05d", read.dta(file=file.path(WORK, 'AP3/AP2_Fips_List.dta'))[1:3109,'fips']) # Nick's stata file didn't have FIPS in it; said to use the first 3,109 lines of this file
AP3 = lapply(list.files(path=file.path(WORK, 'AP3'), pattern='_Matrix', full.names = T), 
             function(z) {
               zz = as.matrix(as.data.frame(fread(z)))
               dimnames(zz) = list(AP3.names, as.character(AP3.names))
               # row.names(zz) = AP3.names
               return(zz)
             })
AP3 = array(unlist(AP3), c(3109, 3109, length(AP3)), dimnames = list(AP3.names, AP3.names,sapply(strsplit(list.files(path=file.path(BASE, 'ajk41/Solar siting/Data/AP'), pattern='_Matrix'), '_'),'[', 1)))
AP3 = abind(AP3, CO2=matrix(41/dim(AP3)[2], nrow=dim(AP3)[1], ncol=dim(AP3)[2])) # Distribute $41/ton benefits over all FIPS evenly? # v6d: $41/ton.
AP3 = AP3[,,AP3.criteria] #--> drops NH3, VOC, etc.



saveRDS(AP3, file.path(WORK.OUT, 'AP3.rds'))




#### Process Main Coefficients to Damages ####
cl <- snow::makeCluster(rep('localhost',60), type='SOCK') 
registerDoSNOW(cl)


p.c.S. = plant.coef %>% 
  left_join(PM25 %>% dplyr::select(ORISPL, PM2_factor), by='ORISPL') %>%
  dplyr::filter(isSOL=='qr.SOL' & !grepl('SOL_', PCA)) %>%
  dplyr::filter(GLOAD_MASS > quantile(GLOAD_MASS, .01) & GLOAD_MASS < quantile(GLOAD_MASS, .99)) %>%  #<-- unstable extremes removed. Symmetric at around -1.4/+1.4
  dplyr::mutate(PM2_MASS = GLOAD_MASS*PM2_factor,
                WECCiter = 1) %>% # WECCiter is here as a placeholder for non-WECC runs.
  dplyr::select(-PM2_factor, -isSOL) %>%
  setDT()


# Divide zipcodes up into chunks for multicore
fzip[,randdraw:=sample(1:100, NROW(fzip), replace=T)]
fzip[,rd:=paste0(pcac, '-', randdraw)]



ptm = Sys.time() 
sr1 <- foreach(zz=isplit(fzip, f=fzip$rd, drop=T), .inorder=F, .errorhandling='pass',
               .packages=c('data.table','abind'),  .noexport=c('sr'),
               .verbose=T) %dopar%{
                 jSummaryRes(res = jDeltaEmissions(zz$value, plant.coef=p.c.S.),
                             AP = AP3,
                             cw = unique(Ot@data[,c('ORISPL_CODE','fips')]), # this should have all of the plants in it.
                             detail=3) #--> detail=3 returns 4 rows for each zip w/1 col for each FIPS
               }
Sys.time()-ptm  # 11 hours using dopar!

sr1 = rbindlist(sr1)
sr1[,total_damages:=apply(sr1[,3:NCOL(sr1)], MAR=1, sum)]
setcolorder(sr1, c('zip','POLL','total_damages'))
sr1 = sr1[order(zip, POLL),]

# Save the point-estimate, all-zip-all-poll-all-FIPS data (to use as dzip in SummaryImpacts_v4d)
saveRDS(sr1, file.path(WORK.OUT, 'Damages','CEC_to_damages_by_POLL.rds'))
stopCluster(cl)





#### Process TRE Texas Coefficients to Damages (across bootstraps - nested foreach) ####
# register parallel backend
cl <- snow::makeCluster(rep('localhost',20), type='SOCK')
registerDoSNOW(cl)

BlistTRE = BlistTRE[which(!unlist(lapply(BlistTRE, is.logical)))]
BlistTRE = rbindlist(BlistTRE)
BlistTRE = BlistTRE %>%  # Merge in the PM25 created above (11-19ish-2019 good)
  left_join(PM25 %>% dplyr::select(ORISPL, PM2_factor), by='ORISPL') %>%
  dplyr::filter(isSOL=='qr.SOL' & !grepl('SOL_', PCA)) %>%
  dplyr::filter(GLOAD_MASS > quantile(GLOAD_MASS, .01) & GLOAD_MASS < quantile(GLOAD_MASS, .99)) %>%  #<-- unstable extremes removed. Symmetric cutoffs.
  dplyr::mutate(PM2_MASS = GLOAD_MASS*PM2_factor) %>%
  dplyr::select(-PM2_factor, -isSOL) %>%
  setDT()  # Check for NA's

sum(is.na(BlistTRE$PM2_MASS))

BlistTRE = split(BlistTRE, f = BlistTRE$iterB) # Split for iteration for boots

fz = fzip[pcac=='ERCT',]
fz[,delta.NERC:=pcac]  # duplicate field for functions
fz[,rd:=sample(1:10, NROW(fz), replace=T)]

# Only one pcac; use rd to split up zipcodes. 750*10 = 7500
ptm = Sys.time() 
sr.pcac <- foreach(zzz = isplit(fz[delta.NERC=='ERCT',], f = fz[delta.NERC=='ERCT',]$rd, drop=T),.inorder=F, .noexport=c('sr'),.errorhandling='pass', .packages=c('data.table','abind'), .verbose=T) %:%
  foreach(p.c.S=iter(BlistTRE, by='cell'), .inorder=F, .errorhandling='pass', .packages=c('data.table','abind'), .noexport=c('sr'), .verbose=T) %dopar%{
    sr = jSummaryRes(res = jDeltaEmissions(delta.gen = zzz$value, plant.coef=as.data.table(p.c.S)),
                     AP = AP3,
                     cw = unique(Ot@data[,c('ORISPL_CODE','fips')]), # line 83, ORSPL and ORISPL_CODE
                     detail=2) #--> 
    sr[,iterB:=p.c.S$iterB[1]]
    return(sr)
  } # end dopar foreach
Sys.time()-ptm  # 11 hours using dopar!

saveRDS(rbindlist(lapply(sr.pcac, rbindlist)), file.path(WORK.OUT, 'Damages', 'TexasBootDamages.rds'))
stopCluster(cl)







#### Process WECC Coefficients to Damages (across bootstraps - nested foreach) ####
# register parallel backend
cl <- snow::makeCluster(rep('localhost',20), type='SOCK')
registerDoSNOW(cl)


BlistWECC = BlistWECC %>% 
  left_join(PM25 %>% dplyr::select(ORISPL, PM2_factor), by='ORISPL') %>%
  # dplyr::filter(isSOL=='qr.SOL' & !grepl('SOL_', PCA)) %>% # WECC code drops solar coefs.
  dplyr::filter(GLOAD_MASS > quantile(GLOAD_MASS, .01) & GLOAD_MASS < quantile(GLOAD_MASS, .99)) %>%  #<-- unstable extremes removed. Symmetric 
  dplyr::mutate(PM2_MASS = GLOAD_MASS*PM2_factor,
                WECCiter = paste0(WECCrun, '--', iterB)) %>%
  dplyr::select(-PM2_factor, -WECCrun) 

BlistWECC = split(BlistWECC, f = BlistWECC$WECCiter)

## Cut to N=750, since that's the size of Texas bootstraps we did with Texas ##
Bindex = sample(1:length(Blist), 750, replace=F)
BlistWECC = BlistWECC[Bindex]

fzip[,delta.NERC:=pcac]

# Do one PCAC at a time. Each one must be done over all B bootstraps (this will take a long)
pcacs = c('RMPA','AZNM','CAMX','NWPP')

for(use.pcac in pcacs){ 
print(use.pcac)
ptm = Sys.time() 
sr.pcac <- foreach(zzz = isplit(fzip[delta.NERC==use.pcac,], f = fzip[delta.NERC==use.pcac,]$delta.NERC, drop=T),.inorder=F, .noexport=c('sr'),.errorhandling='pass', .packages=c('data.table','abind'), .verbose=T) %:%
            foreach(p.c.S=iter(Blist, by='cell'), .inorder=F, .errorhandling='pass', .packages=c('data.table','abind'), .noexport=c('sr'), .verbose=T) %dopar%{
    sr = jSummaryRes(res = jDeltaEmissions(delta.gen = zzz$value, plant.coef=as.data.table(p.c.S)),
                     AP = AP3,
                     cw = unique(Ot@data[,c('ORISPL_CODE','fips')]), # line 83, ORSPL and ORISPL_CODE
                     detail=T) #--> detail=T returns array for each zip with rows=ORISPL, col=FIPS, stack=Pollutant #--> I think I broke detail=F!
    sr[,iterB:=p.c.S$WECCiter[1]]
    return(sr)
  } # end dopar foreach
Sys.time()-ptm

saveRDS(sr.pcac, file.path(WORK.OUT, 'Damages', paste0('WECCBootDamages_',use.pcac,'.rds')))
} # end use.pcac loop

stopCluster(cl)








#### Process Later Years Coefficients to Damages (No Bootstraps) ####
cl <- snow::makeCluster(rep('localhost',60), type='SOCK')
registerDoSNOW(cl)

# LaterYears = readRDS(file.path(BASE, 'ajk41/Solar siting/Data/OUT_2019-10-24','LaterYearsCoefs----2019-10-24.rds')) 
LaterYears = rbindlist(LaterYears[unlist(lapply(LaterYears, function(i)!'character' %in% class(i)))]) %>%
  dplyr::filter(isSOL=='qr.SOL') %>%
  dplyr::filter(!grepl('SOL_', PCA))


p.c.S. = LaterYears %>%
  left_join(PM25 %>% dplyr::select(ORISPL, PM2_factor), by='ORISPL') %>%
  dplyr::filter(isSOL=='qr.SOL' & !grepl('SOL_', PCA)) %>%
  dplyr::filter(GLOAD_MASS > quantile(GLOAD_MASS, .01) & GLOAD_MASS < quantile(GLOAD_MASS, .99)) %>%  #<-- unstable extremes removed. Symmetric 
  dplyr::mutate(PM2_MASS = GLOAD_MASS*PM2_factor,
                WECCiter = 1) %>%
  dplyr::select(-PM2_factor, -isSOL) %>%
  setDT()

sum(is.na(p.c.S.$PM2_factor))
setdiff(LaterYears$ORISPL, PM25$ORISPL)

fzip[,randdraw:=sample(1:200, NROW(fzip), replace=T)]
fzip[,rd:=paste0(pcac, '-', randdraw)]

ptm = Sys.time() 
srl <- foreach(zz=isplit(fzip, f=fzip$rd, drop=T), .inorder=F, .errorhandling='pass',
               .packages=c('data.table','abind'),  .noexport=c('sr'),
               .verbose=T) %dopar%{
                 jSummaryRes(res = jDeltaEmissions(zz$value, plant.coef=p.c.S.),
                             AP = AP3,
                             cw = unique(Ot@data[,c('ORISPL_CODE','fips')]), # this should have all of the plants in it.
                             detail=2) #--> detail=3 returns 4 rows for each zip w/1 col for each FIPS
               }
Sys.time()-ptm 


saveRDS(rbindlist(srl), file.path(WORK.OUT, 'Damages','CEC_to_damages_later_by_POLL.rds'))
stopCluster(cl)



